# "Efficiency and water use: Dynamic effects of irrigation technology adoption"
# by Micah Cameron-Harp and Nathan Hendricks
# 
# Code written by Micah Cameron-Harp
# May 16th, 2024
# 
# This r script separately estimates the effects of irrigators transitioning from flood
#   irrigation to traditional center pivot and from flood irrigation directly
#   to LEPA. The results are depicted in Table C.4 and are stored in the resulting
#   csv file, titled "tableC4.csv" in the outputs/appendices folder.

pacman::p_load(
  did,
  tidyverse,
  openxlsx,
  dplyr)

# Parse arguments submitted by STATA
args = commandArgs(trailingOnly = "TRUE")
arg1 <- args[1]

#Define directories and set working directory
# NOTE - The root directory will be passed here from the main_text_results.do file 
dr_root <- paste0(arg1)
dr_data <- paste0(dr_root, "/data")
dr_output = paste0(dr_root, "/outputs")
dr_output_main = paste0(dr_root, "/outputs/main_text")
dr_output_app = paste0(dr_root, "/outputs/appendices")
dr_output_log = paste0(dr_root, "/outputs/logs")
dr_temp = paste0(dr_root, "/data/intermediate")
setwd(dr_temp)

########################################
#---------- Flood to only Traditional Center Pivot -----#
wrg_flood_cplepa <- read.csv(file = file.path(dr_temp, "flood_cporlepa_prepped.csv"))

#Filter the data to only years before 2016
wrg_flood_cplepa <- wrg_flood_cplepa %>% dplyr::filter(wua_year <= 2015)

#Create a new dataframe to store the results in 
df_template <- data.frame(matrix(ncol=6,nrow=0, 
                                 dimnames=list(NULL, c("dep_var", "effect_estimated",
                                                       "estimate", "se_estimate", 
                                                       "c_crit_val", "panel_mean_dep_var"))))
#Make one version for the ATT's
att_df <- df_template
#Make another for the cohort effects
grp_df <- df_template
#Next version for the event study coefficients
dyn_df <- df_template

#List the dependent variables
effect_order <- c("af_used", "acres_irr", "depth_applied")

###########
#Only flood to lepa
wrg_flood_cp <- wrg_flood_cplepa %>% dplyr::filter(num_switch_flood_cp==1)
###########
#Filter years so it isn't computationally singular
wrg_flood_cp <- wrg_flood_cp %>% dplyr::filter(wua_year >=1991 & wua_year <= 2000)

for (i in effect_order) {
  dep_var <- i
  #Make a df of just the dep var so we can get its mean value
  df_dep_var <- wrg_flood_cp %>% dplyr::select(all_of(dep_var))
  mean_dep_var <- mean(df_dep_var[,1], na.rm = TRUE)
  #Run att_gt Callaway and Sant'Anna using Not Yet Treated as Control Group
  sharp_floodcp_attgt_nyt <- att_gt(yname = dep_var,
                                    tname = "wua_year",
                                    idname = "WR_GROUP",
                                    gname = "cohort_flood_cplepa",
                                    data = wrg_flood_cp,
                                    panel = TRUE,
                                    allow_unbalanced_panel = TRUE,
                                    xformla=~hyd_cond+predev_sat+awc+sandtotal+silttotal+init_af_used,
                                    est_method = "dr",
                                    control_group = c("notyettreated"),
                                    anticipation=0,
                                    clustervars = "WR_GROUP", 
                                    biters=1000
  )
  #Group specific effects to get overall ATT
  agg.gs <- aggte(sharp_floodcp_attgt_nyt, type = "group", na.rm = TRUE, min_e = 0, max_e = Inf)
  #Store ATT  
  att_df[nrow(att_df)+1,] <- c(dep_var, "group_agg_effect_average", agg.gs["overall.att"], agg.gs["overall.se"], agg.gs["crit.val.egt"], mean_dep_var) 
  #Store cohort effects
  agg.gs <- aggte(sharp_floodcp_attgt_nyt, type = "group", na.rm = TRUE, min_e = -Inf, max_e = Inf)
  group_order <- c(agg.gs[["egt"]])
  g_est_lst <- c(agg.gs[["att.egt"]])
  g_se_lst <- c(agg.gs[["se.egt"]])
  g_c <- agg.gs$crit.val.egt
  for (i in 1:length(group_order)) {
    grp_df[nrow(grp_df)+1,] <- c(dep_var, group_order[i], g_est_lst[i], g_se_lst[i], g_c, mean_dep_var)
  }
  
  #Dynamic effects
  agg.es <- aggte(sharp_floodcp_attgt_nyt, type = "dynamic", na.rm = TRUE, min_e = 0, max_e = Inf)
  #Store the att  
  att_df[nrow(att_df)+1,] <- c(dep_var, "event_agg_effect_average", agg.es["overall.att"], agg.es["overall.se"], agg.es["crit.val.egt"], mean_dep_var) 
  #Store the event study coefficients
  agg.es <- aggte(sharp_floodcp_attgt_nyt, type = "dynamic", na.rm = TRUE, min_e = -Inf, max_e = Inf)
  effect_order <- c(agg.es[["egt"]])
  est_lst <- c(agg.es[["att.egt"]])
  se_lst <- c(agg.es[["se.egt"]])
  e_c <- agg.es$crit.val.egt
  for (i in 1:length(effect_order)) {
    dyn_df[nrow(dyn_df)+1,] <- c(dep_var, effect_order[i], est_lst[i], se_lst[i], e_c, mean_dep_var)
  }
}
#Create the 95% ci variables
att_df <- att_df %>%  mutate(ci_95_lb = estimate - c_crit_val*se_estimate,
                             ci_95_ub = estimate + c_crit_val*se_estimate)
dyn_df <- dyn_df %>%  mutate(ci_95_lb = as.numeric(estimate) - as.numeric(c_crit_val)*as.numeric(se_estimate),
                             ci_95_ub = as.numeric(estimate) + as.numeric(c_crit_val)*as.numeric(se_estimate))
grp_df <- grp_df %>%  mutate(ci_95_lb = as.numeric(estimate) - as.numeric(c_crit_val)*as.numeric(se_estimate),
                             ci_95_ub = as.numeric(estimate) + as.numeric(c_crit_val)*as.numeric(se_estimate))
#Save each as .rds
write_rds(att_df, file = file.path(dr_temp, "/cs_floodtocp_att.rds"))
write_rds(dyn_df, file = file.path(dr_temp, "/cs_floodtocp_dyn.rds"))
write_rds(grp_df, file = file.path(dr_temp, "/cs_floodtocp_grp.rds"))

#Save as excel workbook
dataset_names <- list('cs_att' = att_df, 'cs_event' = dyn_df, 'cs_group' = grp_df)
write.xlsx(dataset_names, file = file.path(dr_temp, '/cs_floodtocp.xlsx'))

########################################
#---------- Flood to Only LEPA -----#
wrg_flood_cplepa <- read.csv(file = file.path(dr_temp, "flood_cporlepa_prepped.csv"))

#Filter the data to only include years before 2016 
wrg_flood_cplepa <- wrg_flood_cplepa %>% dplyr::filter(wua_year <= 2015)

#Create a new dataframe to store the results in 
df_template <- data.frame(matrix(ncol=6,nrow=0, 
                                 dimnames=list(NULL, c("dep_var", "effect_estimated",
                                                       "estimate", "se_estimate", 
                                                       "c_crit_val", "panel_mean_dep_var"))))
#Make one version for the ATT's
att_df <- df_template
#Make another for the cohort effects
grp_df <- df_template
#Next version for the event study coefficients
dyn_df <- df_template

#List the dependent variables
effect_order <- c("af_used", "acres_irr", "depth_applied")

###########
#Only flood to lepa
wrg_flood_lepa <- wrg_flood_cplepa %>% dplyr::filter(num_switch_flood_lepa==1)
###########
#Filter years so it isnt computationally singular
wrg_flood_lepa <- wrg_flood_lepa %>% dplyr::filter(wua_year >=1996 & wua_year <= 2005)

for (i in effect_order) {
  dep_var <- i
  #Make a df of just the dep var so we can get its mean value
  df_dep_var <- wrg_flood_lepa %>% dplyr::select(all_of(dep_var))
  mean_dep_var <- mean(df_dep_var[,1], na.rm = TRUE)
  #Run att_gt Callaway and Sant'Anna using Not Yet Treated as Control Group
  sharp_floodlepa_attgt_nyt <- att_gt(yname = dep_var,
                                        tname = "wua_year",
                                        idname = "WR_GROUP",
                                        gname = "cohort_flood_cplepa",
                                        data = wrg_flood_lepa,
                                        panel = TRUE,
                                        allow_unbalanced_panel = TRUE,
                                        xformla=~hyd_cond+predev_sat+awc+sandtotal+silttotal+init_af_used,
                                        est_method = "dr",
                                        control_group = c("notyettreated"),
                                        anticipation=0,
                                        clustervars = "WR_GROUP", 
                                        biters=1000
  )
  #Group specific effects to get overall ATT
  agg.gs <- aggte(sharp_floodlepa_attgt_nyt, type = "group", na.rm = TRUE, min_e = 0, max_e = Inf)
  #Store ATT  
  att_df[nrow(att_df)+1,] <- c(dep_var, "group_agg_effect_average", agg.gs["overall.att"], agg.gs["overall.se"], agg.gs["crit.val.egt"], mean_dep_var) 
  #Store cohort effects
  agg.gs <- aggte(sharp_floodlepa_attgt_nyt, type = "group", na.rm = TRUE, min_e = -Inf, max_e = Inf)
  group_order <- c(agg.gs[["egt"]])
  g_est_lst <- c(agg.gs[["att.egt"]])
  g_se_lst <- c(agg.gs[["se.egt"]])
  g_c <- agg.gs$crit.val.egt
  for (i in 1:length(group_order)) {
    grp_df[nrow(grp_df)+1,] <- c(dep_var, group_order[i], g_est_lst[i], g_se_lst[i], g_c, mean_dep_var)
  }
  
  #Dynamic effects
  agg.es <- aggte(sharp_floodlepa_attgt_nyt, type = "dynamic", na.rm = TRUE, min_e = 0, max_e = Inf)
  #Store the att  
  att_df[nrow(att_df)+1,] <- c(dep_var, "event_agg_effect_average", agg.es["overall.att"], agg.es["overall.se"], agg.es["crit.val.egt"], mean_dep_var) 
  #Store the event study coefficients
  agg.es <- aggte(sharp_floodlepa_attgt_nyt, type = "dynamic", na.rm = TRUE, min_e = -Inf, max_e = Inf)
  effect_order <- c(agg.es[["egt"]])
  est_lst <- c(agg.es[["att.egt"]])
  se_lst <- c(agg.es[["se.egt"]])
  e_c <- agg.es$crit.val.egt
  for (i in 1:length(effect_order)) {
    dyn_df[nrow(dyn_df)+1,] <- c(dep_var, effect_order[i], est_lst[i], se_lst[i], e_c, mean_dep_var)
  }
}
#Create the 95% ci variables
att_df <- att_df %>%  mutate(ci_95_lb = estimate - c_crit_val*se_estimate,
                             ci_95_ub = estimate + c_crit_val*se_estimate)
dyn_df <- dyn_df %>%  mutate(ci_95_lb = as.numeric(estimate) - as.numeric(c_crit_val)*as.numeric(se_estimate),
                             ci_95_ub = as.numeric(estimate) + as.numeric(c_crit_val)*as.numeric(se_estimate))
grp_df <- grp_df %>%  mutate(ci_95_lb = as.numeric(estimate) - as.numeric(c_crit_val)*as.numeric(se_estimate),
                             ci_95_ub = as.numeric(estimate) + as.numeric(c_crit_val)*as.numeric(se_estimate))
#Save each as .rds
write_rds(att_df, file.path(dr_temp, "/cs_floodtolepa_att.rds"))
write_rds(dyn_df, file.path(dr_temp, "/cs_floodtolepa_dyn.rds"))
write_rds(grp_df, file.path(dr_temp, "/cs_floodtolepa_grp.rds"))

#Save as excel workbook
dataset_names <- list('cs_att' = att_df, 'cs_event' = dyn_df, 'cs_group' = grp_df)
write.xlsx(dataset_names, file.path(dr_temp, "/cs_floodtolepa.xlsx"))

############### Table C.4 ###########################
#Merge the ATT estimates for the two separate transitions from 
# flood irrigation into one spreadsheet.
flcp_att_df <- read.xlsx(paste0(dr_temp, "/cs_floodtocp.xlsx"), sheet = "cs_att") %>%
  dplyr::filter(effect_estimated == "group_agg_effect_average") %>%
  dplyr::mutate(transition = "flood to traditional center pivot")
fllepa_att_df <- read.xlsx(paste0(dr_temp, "/cs_floodtolepa.xlsx"), sheet = "cs_att") %>%
  dplyr::filter(effect_estimated == "group_agg_effect_average") %>%
  dplyr::mutate(transition = "flood to LEPA")
output_df <- rbind(flcp_att_df, fllepa_att_df) 
#save
write.csv(output_df, file = file.path(dr_output_app, "/tableC4.csv"))
